home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Compendium Deluxe 2
/
LSD and 17bit Compendium Deluxe - Volume II.iso
/
a
/
prog
/
asmsrc
/
thesource-7.lha
/
Source
/
NumericalMethods.lha
/
NumericalMethods
/
sqrt.s
< prev
Wrap
Text File
|
1994-05-27
|
5KB
|
137 lines
Newsgroups: comp.graphics.algorithms
Path: usenet.ee.pdx.edu!cs.uoregon.edu!sgiblab!swrinde!cs.utexas.edu!howland.reston.ans.net!pipex!sunic!umdac!cs.umu.se!christer
From: christer@cs.umu.se (Christer Ericson)
Subject: Re: Fastest long integer square root algorithm?
Message-ID: <Cnn27G.DEv@cs.umu.se>
Sender: news@cs.umu.se (News Administrator)
Organization: Dept. of Computing Science, Umea Univ., 901 87 Umea, Sweden
References: <2neb6t$mcq@geraldo.cc.utexas.edu>
Date: Sat, 2 Apr 1994 15:40:25 GMT
Lines: 124
In <2neb6t$mcq@geraldo.cc.utexas.edu> Steve Mariotti <stevem@zaphod.ee.pitt.edu> writes:
>
>My goal in life right now is to find the fastest integer square root
>algorithm that is humanly possible. I know, I should probably set my
>sights a little higher.
>[...]
This piece of code is written to work on all 680x0-processors (which
seemed to be your target architechture). You can't go faster than this
on a 68000 without resorting to some sort of table lookup.
It is the basic Newton/Babylonian method, but with some interesting
twists.
--- cut here ---
unsigned short ce_quick_sqrt(n)
register unsigned long n;
{
asm {
;-------------------------
;
; Routine : ISQRT; integer square root
; I/O parameters: d0.w = sqrt(d0.l)
; Registers used: d0-d2/a0
;
; This is a highly optimized integer square root routine, based
; on the iterative Newton/Babylonian method
;
; r(n + 1) = (r(n) + A / R(n)) / 2
;
; Verified over the full interval [0x0L,0xFFFFFFFFL]
;
; Some minor compromises have been made to make it perform well
; on the 68000 as well as 68020/030/040. This routine outperforms
; the best of all other algorithms I've seen (except for a table
; lookup).
;
; Copyright (c) Christer Ericson, 1993. All rights reserved.
;
; Christer Ericson, Dept. of Computer Science, University of Umea,
; S-90187 UMEA, SWEDEN. Internet: christer@cs.umu.se
;
;-------------------------
;
; Macintosh preamble since THINK C passes first register param
; in d7. Remove this section on any other machine
;
move.l d7,d0
;-------------------------
;
; Actual integer square root routine starts here
;
move.l d0,a0 ; save original input value for the iteration
beq.s @exit ; unfortunately we must special case zero
moveq #2,d1 ; init square root guess
;-------------------------
;
; Speed up the process of finding a power of two approximation
; to the actual square root by shifting an appropriate amount
; when the input value is large enough
;
; If input values are word only, this section can be removed
;
move.l d0,d2
swap d2 ; do [and.l 0xFFFF0000,d2] this way to...
tst.w d2 ; go faster on 68000 and to avoid having to...
beq.s @skip8 ; reload d2 for the next test below
move.w #0x200,d1 ; faster than lsl.w #8,d1 (68000)
lsr.l #8,d0
@skip8 and.w #0xFE00,d2 ; this value and shift by 5 are magic
beq.s @skip4
lsl.w #5,d1
lsr.l #5,d0
@skip4
;-------------------------
;
; This finds the power of two approximation to the actual square root
; by doing the integer equivalent to sqrt(x) = 2 ^ (log2(x) / 2). This
; is done by shifting the input value down while shifting the root
; approximation value up until they meet in the middle. Better precision
; (in the step described below) is gained by starting the approximation
; at 2 instead of 1 (this means that the approximation will be a power
; of two too large so remember to shift it down).
;
; Finally (and here's the clever thing) since, by previously shifting the
; input value down, it has actually been divided by the root approximation
; value already so the first iteration is "for free". Not bad, eh?
;
@loop add.l d1,d1
lsr.l #1,d0
cmp.l d0,d1
bcs.s @loop
@skip lsr.l #1,d1 ; adjust the approximation
add.l d0,d1 ; here we just add and shift to...
lsr.l #1,d1 ; get the first iteration "for free"!
;-------------------------
;
; The iterative root value is guaranteed to be larger than (or equal to)
; the actual square root, except for the initial guess. But since the first
; iteration was done above, the loop test can be simplified below
; (Oh, without the bvs.s the routine will fail on most large numbers, like
; for instance, 0xFFFF0000. Could there be a better way of handling these,
; so the bvs.s can be removed? Nah...)
;
@loop2 move.l a0,d2 ; get original input value
move.w d1,d0 ; save current guess
divu.w d1,d2 ; do the Newton method thing
bvs.s @exit ; if div overflows, exit with current guess
add.w d2,d1
roxr.w #1,d1 ; roxr ensures shifting back carry overflow
cmp.w d0,d1
bcs.s @loop2 ; exit with result in d0.w
@exit
}
}
--- and here ---
Christer Ericson --- Internet: christer@cs.umu.se --- tel: +46-90-166794
Department of Computer Science, University of Umea, S-90187 UMEA, SWEDEN